# -*- coding: utf-8 -*-

import tarfile
import pandas as pd

def save_in_chunks(data, chunk_size_mb=1000, base_filename='D:\\Code\\subject-word-extraction\\data\\output\\large_file'):
    chunk_size = chunk_size_mb * 1024 * 1024  # 将MB转换为字节
    for i in range(0, len(data), chunk_size):
        chunk = data[i:i + chunk_size]
        filename = f"{base_filename}_part{i // chunk_size + 1}.tar.gz"
        with tarfile.open(filename, 'w:gz') as tar:
            with open(f'temp_chunk_{i // chunk_size + 1}.txt', 'w') as f:
                f.write(chunk)
            tar.add(f'temp_chunk_{i // chunk_size + 1}.txt')
        print(f"Saved: {filename}")

# if __name__=="__main__":
#     df = pd.read_csv("D:\\Code\\subject-word-extraction\\data\\output\\annual_report.csv")
#     save_in_chunks(df)
